In this notebook, code is made available that:
Decision made to only use cases/deaths data from Mar 16, 2020 to Mar 29, 2021 (lowest common denominator).
import covid19czechia as CZ
import covid19sweden as SE
import numpy as np
import plotly.express as px
import pandas as pd
from datetime import datetime, date, timedelta
import dtw
# utility functions
### 1
def week_num_to_date(year, week_num):
if year == 2020:
return(datetime.strptime(str(year) + str(week_num) + '-1', '%Y%W-%w') - timedelta(days = 7))
elif year == 2021:
return(datetime.strptime(str(year) + str(week_num) + '-1', '%Y%W-%w'))
### 2
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days/7) + 1):
yield start_date + timedelta(7*n)
# get PL data
#file_name = './data/pl_google_sheet/' + datetime.today().strftime('%Y%m%d') + '.xlsx'
file_name = './data/pl_google_sheet/' + '20210412' + '.xlsx'
### read in PL cases data
pl_cases = pd.read_excel(io = file_name,
sheet_name='Wzrost w województwach',
dtype=object,
engine='openpyxl',
skiprows=7,
nrows=16)
### convert wide form data to long form (for facilitating future merge)
pl_value_vars = list(pl_cases.columns)[1:-2]
pl_cases = pd.melt(frame = pl_cases.iloc[:, 0:-2],
id_vars='Województwo',
value_vars=pl_value_vars,
var_name='date',
value_name='cases')
### get year and week data based on date
pl_cases['year'] = pl_cases['date'].apply(lambda x: int(x.year))
pl_cases['week'] = pl_cases['date'].apply(lambda x: int(x.isocalendar()[1]))
pl_cases.loc[(pl_cases['week'] == 53) & (pl_cases['date'].apply(lambda x:x.month) == 1), 'year'] -= 1
pl_cases = pl_cases.groupby(['year', 'week', 'Województwo']).aggregate({'cases': 'sum'}).reset_index()
pl_cases['date'] = pl_cases.apply(lambda r: week_num_to_date(r.year, r.week), axis = 1)
### read in PL deaths data
pl_deaths = pd.read_excel(io = file_name,
sheet_name='Wzrost w województwach',
dtype=object,
engine='openpyxl',
skiprows=50,
nrows=16)
### convert wide form data to long form (for facilitating future merge)
pl_deaths = pd.melt(frame = pl_deaths.iloc[:, 0:-2],
id_vars='Województwo',
value_vars=pl_value_vars,
var_name='date',
value_name='deaths')
### get year and week data based on date
pl_deaths['year'] = pl_deaths['date'].apply(lambda x: int(x.year))
pl_deaths['week'] = pl_deaths['date'].apply(lambda x: int(x.isocalendar()[1]))
pl_deaths.loc[(pl_deaths['week'] == 53) & (pl_deaths['date'].apply(lambda x:x.month) == 1), 'year'] -= 1
pl_deaths = pl_deaths.groupby(['year', 'week', 'Województwo']).aggregate({'deaths': 'sum'}).reset_index()
pl_deaths['date'] = pl_deaths.apply(lambda r: week_num_to_date(r.year, r.week), axis = 1)
### read in 'new' PL tests data (tests after 28.12.2020)
pl_tests_new = pd.read_excel(io = file_name,
sheet_name='Testy w województwach',
dtype=object,
engine='openpyxl',
skiprows=3,
nrows=16)
### convert wide form data to long form (for facilitating future merge)
pl_tests_new = pd.melt(frame = pl_tests_new.iloc[:, 1:-2],
id_vars='Województwo',
value_vars=list(pl_tests_new.columns)[2:-2],
var_name='date',
value_name='tests')
### filter out old data
pl_tests_new = pl_tests_new[pl_tests_new['date'] >= '2020-12-28'] # only pick up data from 28 Dec 2020 or later
### get year and week data based on date
pl_tests_new['year'] = pl_tests_new['date'].apply(lambda x: int(x.year))
pl_tests_new['week'] = pl_tests_new['date'].apply(lambda x: int(x.isocalendar()[1]))
pl_tests_new.loc[(pl_tests_new['week'] == 53) & (pl_tests_new['date'].apply(lambda x:x.month) == 1), 'year'] -= 1
pl_tests_new = pl_tests_new.groupby(['year', 'week', 'Województwo']).aggregate({'tests': 'sum'}).reset_index()
pl_tests_new['date'] = pl_tests_new.apply(lambda r: week_num_to_date(r.year, r.week), axis = 1)
### read in 'old' PL tests data (tests up to 28.12.2020)
pl_wkly_tests_old = pd.read_excel(io = file_name,
sheet_name=' Testy w województwach od 11.05', # sheet name appears as 'Testy w województwach od 11.05 do 28.12.2020' in some files
dtype=object,
engine='openpyxl',
skiprows=2,
nrows=16,
verbose=False,
parse_dates=False,
date_parser=None)
pl_wkly_tests_old.iloc[12, 0] = 'Świętokrzyskie'
### convert cumulative to weekly counts
data_copy = pl_wkly_tests_old.iloc[:, 1:-6].copy()
data_copy.insert(0, 'dummy', pd.Series([0]*16))
data_copy.iloc[12, 13] = 0 # hardcoded adjustment for 'Świętokrzyskie'
data_copy.columns = pl_wkly_tests_old.iloc[:, 1:-5].columns
pl_wkly_tests_old.iloc[:, 1:-5] = pl_wkly_tests_old.iloc[:, 1:-5] - data_copy
### cache column names (dates)
pl_wkly_tests_old= pl_wkly_tests_old.iloc[:, 0:-5] # first remove NaN columns
col_names = list(pl_wkly_tests_old.columns[1:-1])
col_names = [pl_wkly_tests_old.columns[0]]+list(col_names)
### filter out first week tests info as that appears to be a cumulative figure which could not be corrected
### due to lack of data before that week
pl_wkly_tests_old.drop(columns = pl_wkly_tests_old.columns[1], inplace = True, errors = 'raise')
### adjust dates
pl_wkly_tests_old.columns = col_names
### convert wide form data to long form (for facilitating future merge)
pl_wkly_tests_old = pd.melt(frame = pl_wkly_tests_old,
id_vars='Województwo',
value_vars=list(pl_wkly_tests_old.columns)[1:],
var_name='date',
value_name='tests')
### get year and week data based on date
pl_wkly_tests_old['year'] = pl_wkly_tests_old['date'].apply(lambda x: int(x.year))
pl_wkly_tests_old['week'] = pl_wkly_tests_old['date'].apply(lambda x: int(x.isocalendar()[1]))
### filter out new data
pl_wkly_tests_old = pl_wkly_tests_old[pl_wkly_tests_old['date'] < '2020-12-28'] # only pick up data from before 28 Dec 2020
# aggregate PL data
pl_data = pd.merge(pd.merge(pl_cases, pl_deaths, on = ['Województwo', 'date', 'year', 'week'], how = 'left'),
pd.concat([pl_tests_new, pl_wkly_tests_old]), on = ['Województwo', 'date', 'year', 'week'], how = 'left') \
.rename({'Województwo':'name'}, axis = 1)
# aggregate SE data
se_data = SE.covid_deaths()
se_data = se_data.groupby(['year', 'week', 'region']) \
.aggregate({'deaths':'sum', 'confirmed':'sum'}).reset_index()
se_data['date'] = se_data.apply(lambda r: week_num_to_date(r.year, r.week), axis = 1)
se_data = se_data.rename({'confirmed':'cases'}, axis = 1)
### read in CZ cases data
cz_cases = CZ.covid_confirmed(level = 2)
cz_cases['year'] = cz_cases['date'].apply(lambda x: x.year)
cz_cases.loc[(cz_cases['week'] == 53) & (cz_cases['date'].apply(lambda x:x.month) == 1), 'year'] -= 1
cz_cases = cz_cases.groupby(['year', 'week', 'region']).aggregate({'confirmed': 'sum'}).reset_index()
cz_cases['date'] = cz_cases.apply(lambda r: week_num_to_date(r.year, r.week), axis = 1)
cz_cases = cz_cases.rename({'confirmed':'cases'}, axis = 1)
### read in CZ deaths data
cz_deaths = CZ.covid_deaths(level = 2)
cz_deaths['year'] = cz_deaths['date'].apply(lambda x: x.year)
cz_deaths.loc[(cz_deaths['week'] == 53) & (cz_deaths['date'].apply(lambda x:x.month) == 1), 'year'] -= 1
cz_deaths = cz_deaths.groupby(['year', 'week', 'region']).aggregate({'deaths': 'sum'}).reset_index()
cz_deaths['date'] = cz_deaths.apply(lambda r: week_num_to_date(r.year, r.week), axis = 1)
### read in CZ tests data
cz_tests = CZ.covid_tests(level = 2)
cz_tests['year'] = cz_tests['date'].apply(lambda x: x.year)
cz_tests.loc[(cz_tests['week'] == 53) & (cz_tests['date'].apply(lambda x:x.month) == 1), 'year'] -= 1
cz_tests = cz_tests.groupby(['year', 'week', 'region']).aggregate({'tests': 'sum'}).reset_index()
cz_tests['date'] = cz_tests.apply(lambda r: week_num_to_date(r.year, r.week), axis = 1)
# aggregate CZ data
cz_data = pd.merge(pd.merge(cz_cases, cz_deaths, on = ['region', 'date', 'year', 'week'], how = 'left'),
cz_tests, on = ['region', 'date', 'year', 'week'], how = 'left')
# set up the regions dataset
regions = pd.read_csv('./data/regions.csv')
regions.loc[regions['NUTS3'].isin(['SE214', 'SE322', 'SE221', 'SE212', 'SE213', 'SE321', \
'SE332', 'SE331', 'SE312', 'SE311', 'SE313', 'SE124', \
'SE122', 'SE125', 'CZ041', 'SE231', 'SE211', 'SE121']), 'cluster_1'] = 1
regions.loc[regions['NUTS3'].isin(['SE123', 'CZ051', 'CZ063', 'CZ053', 'CZ052', 'CZ032', \
'CZ031', 'CZ072', 'CZ071']), 'cluster_1'] = 2
regions.loc[regions['NUTS3'].isin(['CZ042', 'PL52', 'PL43', 'PL84', 'CZ064', 'PL72', \
'CZ080', 'CZ020', 'SE224', 'PL62', 'PL42', 'SE232']), 'cluster_1'] = 3
regions.loc[regions['NUTS3'].isin(['PL61', 'PL82', 'PL81', 'PL63', 'PL71', 'SE110']), 'cluster_1'] = 4
regions.loc[regions['NUTS3'].isin(['PL51', 'PL21', 'PL41', 'PL22', 'PL9']), 'cluster_1'] = 5
regions.loc[regions['NUTS3'] == 'CZ010', 'cluster_1'] = 6
regions['cluster_2'] = regions['cluster_1'] - 1
regions.loc[regions['cluster_2'] == 0, 'cluster_2'] = 1
regions = regions.rename(columns = {'NUTS3':'region'})
# cross reference the PL/CZ/SE datasets with info in the regions file
pl_data = pd.merge(regions[['region', 'name', 'population']], pl_data, on='name', how='inner')
cz_data = pd.merge(regions[['region', 'name', 'population']], cz_data, on='region', how='inner')
se_data = pd.merge(regions[['region', 'name', 'population']], se_data, on='region', how='inner')
# concatenate all the three datasets
all_data = pd.concat([pl_data, cz_data, se_data])
# concatenate NUTS code and region name
all_data['region'] = all_data['region'] + ' - ' + all_data['name']
# fix datatype of 'tests' column (handle NaN)
all_data['tests'] = all_data['tests'].astype('float')# concatenate all the three datasets
all_data = pd.concat([pl_data, cz_data, se_data])
# concatenate NUTS code and region name
all_data['region'] = all_data['region'] + ' - ' + all_data['name']
# fix datatype of 'tests' column (handle NaN)
all_data['tests'] = all_data['tests'].astype('float')
# compute deaths and cases per 100K capita, deaths per test, cases per test
all_data['cases_100K'] = all_data['cases']/all_data['population']*100000
all_data['deaths_100K'] = all_data['deaths']/all_data['population']*100000
all_data['tests_100K'] = all_data['tests']/all_data['population']*100000
all_data['cases_per_test'] = all_data['cases']/all_data['tests']
all_data['deaths_per_test'] = all_data['deaths']/all_data['tests']
all_data['deaths_per_case'] = all_data['deaths']/all_data['cases']
# exclude immature data
all_data = all_data[all_data['date'] <= '2021-03-29']
all_data = all_data[all_data['date'] >= '2020-03-16']
# impute missing tests/cases/deaths
regions_series = all_data['region'].unique()
start_date = all_data['date'].min() # by construction, this is the min date of reporting of cases and deaths, viz. Mar 16, 2020
end_date = all_data['date'].max() # by construction, this is the max date of reporting of cases and deaths, viz. Mar 29, 2021
cz_tests_min_date = cz_tests['date'].min()
cz_tests_max_date = cz_tests['date'].max()
pl_tests_min_date = pd.concat([pl_tests_new, pl_wkly_tests_old])['date'].min()
pl_tests_max_date = pd.concat([pl_tests_new, pl_wkly_tests_old])['date'].max()
for dt in daterange(start_date, end_date):
for reg in regions_series:
record = all_data[(all_data['date'] == dt) & (all_data['region'] == reg)]
if record.shape[0] == 0: # row does not exist - so create it
year = dt.year
week = dt.isocalendar()[1]
if week == 53 & dt.month == 1:
year = year - 1
if reg.startswith('PL', 0, 2) and dt >= pl_tests_min_date and dt <= pl_tests_max_date:
tests = 0
tests_100K = 0
cases_per_test = 0
deaths_per_test = 0
elif reg.startswith('CZ', 0, 2) and dt >= cz_tests_min_date and dt <= cz_tests_max_date:
tests = 0
tests_100K = 0
cases_per_test = 0
deaths_per_test = 0
else:
tests = np.nan
tests_100K = np.nan
cases_per_test = np.nan
deaths_per_test = np.nan
append_record = {'region':reg,
'year':year,
'week': week,
'cases':0,
'date': dt,
'deaths': 0,
'tests': tests,
'cases_100K': 0,
'deaths_100K': 0,
'tests_100K': tests_100K,
'cases_per_test': cases_per_test,
'deaths_per_test': deaths_per_test,
'deaths_per_case': 0
}
all_data = all_data.append(append_record, ignore_index = True)
else:
if pd.isnull(record['cases']).item():
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'cases'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'cases_100K'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'cases_per_test'] = 0
if pd.isnull(record['deaths']).item():
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'deaths'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'deaths_100K'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'deaths_per_test'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'deaths_per_case'] = 0
# Note that an assumption here is there will never be deaths if there are no cases
if pd.isnull(record['tests']).item():
if reg.startswith('PL', 0, 2) and dt >= pl_tests_min_date and dt <= pl_tests_max_date:
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'tests'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'tests_100K'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'cases_per_test'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'deaths_per_test'] = 0
if reg.startswith('CZ', 0, 2) and dt >= cz_tests_min_date and dt <= cz_tests_max_date:
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'tests'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'tests_100K'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'cases_per_test'] = 0
all_data.loc[(all_data['date'] == dt) & (all_data['region'] == reg), 'deaths_per_test'] = 0
# Note that an assumption here is there will never be cases/deaths if there are no tests
# visualise trends in all regions
# 1
fig = px.line(all_data[pd.notnull(all_data['cases'])], x='date', y='cases', color='region',
title='Weekly Cases by Region')
fig.show()
# 2
fig = px.line(all_data[pd.notnull(all_data['cases_100K'])], x='date', y='cases_100K', color='region',
title='Weekly Cases per 100K Capita by Region')
fig.show()
# 3
fig = px.line(all_data[pd.notnull(all_data['deaths'])], x='date', y='deaths', color='region',
title='Weekly COVID19 Deaths by Region')
fig.show()
# 4
fig = px.line(all_data[pd.notnull(all_data['deaths_100K'])], x='date', y='deaths_100K', color='region',
title='Weekly COVID19 Deaths per 100K Capita by Region')
fig.show()
# 5
fig = px.line(all_data[pd.notnull(all_data['tests'])], x='date', y='tests', color='region',
title='Weekly Tests by Region')
fig.show()
# 6
fig = px.line(all_data[pd.notnull(all_data['tests_100K'])], x='date', y='tests', color='region',
title='Weekly Tests per 100K Capita by Region')
fig.show()
# 7
fig = px.line(all_data[pd.notnull(all_data['cases_per_test'])], x='date', y='cases_per_test', color='region',
title='Weekly Cases per Test by Region')
fig.show()
# 8
fig = px.line(all_data[pd.notnull(all_data['deaths_per_test'])], x='date', y='deaths_per_test', color='region',
title='Weekly COVID19 Deaths per Test by Region')
fig.show()
# 9
fig = px.line(all_data[pd.notnull(all_data['deaths_per_case'])], x='date', y='deaths_per_case', color='region',
title='Weekly COVID19 Deaths per Case by Region')
fig.show()
Four clusters of regions are created below:
# obtain distance matrices for clustering
### initialise
dm1 = np.zeros([len(regions_series), len(regions_series)])
dm2 = np.zeros([len(regions_series), len(regions_series)])
dm1_dtw = np.zeros([len(regions_series), len(regions_series)])
dm2_dtw = np.zeros([len(regions_series), len(regions_series)])
### compute and store distances
for reg_id, reg in enumerate(regions_series):
time_series_1_dm1 = all_data[(all_data['region'] == reg) & pd.notnull(all_data['cases_100K'])] \
[['cases_100K', 'date']].sort_values(by = 'date').set_index('date').transpose()
time_series_1_dm2 = all_data[(all_data['region'] == reg) & pd.notnull(all_data['deaths_100K'])] \
[['deaths_100K', 'date']].sort_values(by = 'date').set_index('date').transpose()
### NOTE: transpose being done to facilitate date-wise difference calculations
for reg_id_2 in range(reg_id + 1, len(regions_series)):
time_series_2_dm1 = \
all_data[all_data['region'] == regions_series[reg_id_2]][['cases_100K', 'date']] \
.sort_values(by = 'date').set_index('date').transpose()
time_series_2_dm2 = \
all_data[all_data['region'] == regions_series[reg_id_2]][['deaths_100K', 'date']]\
.sort_values(by = 'date').set_index('date').transpose()
diff_1 = abs(time_series_1_dm1 - time_series_2_dm1).transpose()
diff_1 = diff_1[pd.notnull(diff_1['cases_100K'])]
diff_2 = abs(time_series_1_dm2 - time_series_2_dm2).transpose()
diff_2 = diff_2[pd.notnull(diff_2['deaths_100K'])]
### NOTE: transpose being done here (again) to exclude null values, which are
### generated if one time-series has no value for a given date, but the other does
dm1[reg_id, reg_id_2] = diff_1.max().item()
dm1[reg_id_2, reg_id] = dm1[reg_id, reg_id_2]
dm2[reg_id, reg_id_2] = diff_2.max().item()
dm2[reg_id_2, reg_id] = dm2[reg_id, reg_id_2]
dm1_dtw[reg_id, reg_id_2] = dtw.dtw(time_series_1_dm1, time_series_2_dm1).normalizedDistance
dm1_dtw[reg_id_2, reg_id] = dm1_dtw[reg_id, reg_id_2]
dm2_dtw[reg_id, reg_id_2] = dtw.dtw(time_series_1_dm2, time_series_2_dm2).normalizedDistance
dm2_dtw[reg_id_2, reg_id] = dm2_dtw[reg_id, reg_id_2]
# export data files to csv
pd.DataFrame(dm1, columns = regions_series).rename(dict(enumerate(regions_series))) \
.to_csv('./data/clustering_distance_datasets/ts_cases_100k_wo_dtw.csv')
pd.DataFrame(dm2, columns = regions_series).rename(dict(enumerate(regions_series))) \
.to_csv('./data/clustering_distance_datasets/ts_deaths_100k_wo_dtw.csv')
pd.DataFrame(dm1_dtw, columns = regions_series).rename(dict(enumerate(regions_series))) \
.to_csv('./data/clustering_distance_datasets/ts_cases_100k_w_dtw.csv')
pd.DataFrame(dm2_dtw, columns = regions_series).rename(dict(enumerate(regions_series))) \
.to_csv('./data/clustering_distance_datasets/ts_deaths_100k_w_dtw.csv')
# visualise trends in all regions
# 1
fig = px.line(all_data[(pd.notnull(all_data['cases'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='cases', color='region', title='Weekly Cases by Region - Poland')
fig.show()
# 2
fig = px.line(all_data[(pd.notnull(all_data['cases_100K'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='cases_100K', color='region', title='Weekly Cases per 100K Capita by Region - Poland')
fig.show()
# 3
fig = px.line(all_data[(pd.notnull(all_data['deaths'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='deaths', color='region', title='Weekly COVID19 Deaths by Region - Poland')
fig.show()
# 4
fig = px.line(all_data[(pd.notnull(all_data['deaths_100K'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='deaths_100K', color='region',
title='Weekly COVID19 Deaths per 100K Capita by Region - Poland')
fig.show()
# 5
fig = px.line(all_data[(pd.notnull(all_data['tests'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='tests', color='region', title='Weekly Tests by Region - Poland')
fig.show()
# 6
fig = px.line(all_data[(pd.notnull(all_data['tests_100K'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='tests', color='region', title='Weekly Tests per 100K Capita by Region - Poland')
fig.show()
# 7
fig = px.line(all_data[(pd.notnull(all_data['cases_per_test'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='cases_per_test', color='region', title='Weekly Cases per Test by Region - Poland')
fig.show()
# 8
fig = px.line(all_data[(pd.notnull(all_data['deaths_per_test'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='deaths_per_test', color='region',
title='Weekly COVID19 Deaths per Test by Region - Poland')
fig.show()
# 9
fig = px.line(all_data[(pd.notnull(all_data['deaths_per_case'])) & (all_data['region'].str.startswith('PL'))],
x='date', y='deaths_per_case', color='region',
title='Weekly COVID19 Deaths per Case by Region - Poland')
fig.show()
# visualise trends in all regions
# 1
fig = px.line(all_data[(pd.notnull(all_data['cases'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='cases', color='region', title='Weekly Cases by Region - Czechia')
fig.show()
# 2
fig = px.line(all_data[(pd.notnull(all_data['cases_100K'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='cases_100K', color='region', title='Weekly Cases per 100K Capita by Region - Czechia')
fig.show()
# 3
fig = px.line(all_data[(pd.notnull(all_data['deaths'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='deaths', color='region', title='Weekly COVID19 Deaths by Region - Czechia')
fig.show()
# 4
fig = px.line(all_data[(pd.notnull(all_data['deaths_100K'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='deaths_100K', color='region',
title='Weekly COVID19 Deaths per 100K Capita by Region - Czechia')
fig.show()
# 5
fig = px.line(all_data[(pd.notnull(all_data['tests'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='tests', color='region', title='Weekly Tests by Region - Czechia')
fig.show()
# 6
fig = px.line(all_data[(pd.notnull(all_data['tests_100K'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='tests', color='region', title='Weekly Tests per 100K Capita by Region - Czechia')
fig.show()
# 7
fig = px.line(all_data[(pd.notnull(all_data['cases_per_test'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='cases_per_test', color='region', title='Weekly Cases per Test by Region - Czechia')
fig.show()
# 8
fig = px.line(all_data[(pd.notnull(all_data['deaths_per_test'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='deaths_per_test', color='region',
title='Weekly COVID19 Deaths per Test by Region - Czechia')
fig.show()
# 9
fig = px.line(all_data[(pd.notnull(all_data['deaths_per_case'])) & (all_data['region'].str.startswith('CZ'))],
x='date', y='deaths_per_case', color='region',
title='Weekly COVID19 Deaths per Case by Region - Czechia')
fig.show()
# visualise trends in all regions
# 1
fig = px.line(all_data[(pd.notnull(all_data['cases'])) & (all_data['region'].str.startswith('SE'))],
x='date', y='cases', color='region', title='Weekly Cases by Region - Sweden')
fig.show()
# 2
fig = px.line(all_data[(pd.notnull(all_data['cases_100K'])) & (all_data['region'].str.startswith('SE'))],
x='date', y='cases_100K', color='region', title='Weekly Cases per 100K Capita by Region - Sweden')
fig.show()
# 3
fig = px.line(all_data[(pd.notnull(all_data['deaths'])) & (all_data['region'].str.startswith('SE'))],
x='date', y='deaths', color='region', title='Weekly COVID19 Deaths by Region - Sweden')
fig.show()
# 4
fig = px.line(all_data[(pd.notnull(all_data['deaths_100K'])) & (all_data['region'].str.startswith('SE'))],
x='date', y='deaths_100K', color='region',
title='Weekly COVID19 Deaths per 100K Capita by Region - Sweden')
fig.show()
# 9
fig = px.line(all_data[(pd.notnull(all_data['deaths_per_case'])) & (all_data['region'].str.startswith('SE'))],
x='date', y='deaths_per_case', color='region',
title='Weekly COVID19 Deaths per Case by Region - Sweden')
fig.show()